#Google FONTS
library(showtext)
#Lato for numbers, Assistant for text
font_add_google("Lato")
font_add_google("Assistant")
showtext_auto()
# Read in all three datasets.

life_expectancy_years <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/life_expectancy_years.csv")
## Rows: 187 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
income_per_person <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/income_per_person.csv")
## Rows: 193 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
countries_total <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/countries_total.csv")
## Rows: 248 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): name, alpha-2, alpha-3, iso_3166-2, region, sub-region, intermediat...
## dbl (4): country-code, region-code, sub-region-code, intermediate-region-code
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
population <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/population_total.csv")
## Rows: 195 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Manipulated the data to write a final data set

lifeexpectancylong <- life_expectancy_years %>%
  gather(key = "Year",       # the column names of the wide table
         value = "LifeExp",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )   
incomeperpersonlong <- income_per_person %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Income",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )       # removing records with missing values ?
# combine the two tables together

populationlong <- population %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Population",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )       

LifeExpIncom <-left_join(incomeperpersonlong , lifeexpectancylong, by = c("geo" , "Year")) 
LifeExpIncom2 <- left_join(LifeExpIncom , populationlong , by = c("geo","Year"))



## Subregions data set
subregions <- countries_total %>% 
    select(name , region)

# Add sub regions to create final data set.
LifeExpIncomFinal <- LifeExpIncom2 %>% left_join(subregions, by = c("geo" = "name"))
## New names:
## * `` -> ...1
## * ...1 -> ...2
## * ...2 -> ...3
## * ...3 -> ...4
## * ...4 -> ...5
## Rows: 42486 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): geo, region
## dbl (9): ...1, ...2, ...3, ...4, ...5, Year, Income, LifeExp, Population
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Location of Final dataset on Github
LifeExpIncomFinal <- "https://raw.githubusercontent.com/APM3030/STA553/main/homework3/LifeExpIncomFinal.csv"
LifeExpIncomFinal <- read_csv(LifeExpIncomFinal)
## New names:
## * `` -> ...1
## * ...1 -> ...2
## Rows: 42486 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): geo, region
## dbl (6): ...1, ...2, Year, Income, LifeExp, Population
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#convert year to integer
LifeExpIncomFinal <- mutate(LifeExpIncomFinal, Year = as.integer(LifeExpIncomFinal$Year))
    ggplot(LifeExpIncomFinal, aes(Income, LifeExp, 
                  size = Population, colour = region)) +
    geom_point(alpha = 0.5) +
        guides(size = FALSE)+
    scale_size(range = c(2, 12)) +
           scale_x_log10(labels = scales::dollar) +
    labs(title = 'Relationship Between GDP and Life Expectancy' ,
    subtitle = 'Year: {frame_time}', 
                    x = 'GDP Per Capita',
                    y = 'Life Expectancy in Years',
                    colour = 'Region:')+
    theme_minimal() +
    theme( 
        axis.line = element_line(size = .7, linetype = "solid") ,
        text=element_text(size = 18 , family="Assistant"),
        axis.title.y = element_text(vjust = -2),
        axis.title.x = element_text(vjust = 3),
        plot.title = element_text(face = "bold" , size = 26 , vjust = -3.5),
        plot.subtitle = element_text(vjust = -1),
        axis.text.x = element_text(family = "Lato" , size = 16),
        axis.text.y = element_text(family = "Lato" , size = 16),
        legend.title = element_text(face = "bold")
        ) +
    transition_time(Year)

#subet data for year 2015
leifinal2015 <- LifeExpIncomFinal %>% filter(Year == 2015)
w <-  
ggplot(leifinal2015, aes(Income, LifeExp, 
                  size = Population, colour = geo)) +
    geom_point(alpha = 0.5) +
    scale_size(range = c(2, 12)) +
           scale_x_log10(labels = scales::dollar) +
    labs(title = 'Relationship Between GDP and Life Expectancy' ,
    subtitle = 'Year: 2015', 
                    x = 'GDP Per Capita',
                    y = 'Life Expectancy in Years',
                    )+
    theme_minimal() +
    theme( 
        axis.line = element_line(size = .7, linetype = "solid") ,
        plot.title = element_text(face = "bold"),
        legend.position = "none"
        )
ggplotly(w)